/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.discovery;

import java.util.Arrays;
import java.util.Collection;

import mosdi.fa.Alphabet;
import mosdi.index.SuffixTree;
import mosdi.index.SuffixTreeWalker;
import mosdi.util.BitArray;
import mosdi.util.IupacStringConstraints;
import mosdi.util.Log;
import mosdi.util.iterators.AbelianPatternInstanceIterator;
import mosdi.util.iterators.IupacPatternIterator;
import mosdi.util.iterators.LexicographicalIterator;
import mosdi.util.iterators.RangeIterator;

public class MotifFinder {
	
	private SuffixTree suffixTree;
	private BitArray[] generalizedAlphabet;

	// stuff used in current search
	private boolean calculateMultiplicity;
	private int multiplicityLeft;
	private int[] abelianPattern;
	private IupacStringConstraints constraints;
	private int length;
	// string currently under investigation
	private int[] string;
	LexicographicalIterator iterator;
	
	private int[] checkCounts;
	private int[] skipCounts;
	
	private boolean considerReverse;
	
	/** Constructor.
	 *  @param suffixTree          Suffix tree of the strings to be searched. If 
	 *                             considerReverse is true, then the suffix tree 
	 *                             must contain the reverse complementary sequences
	 *                             as well.
	 *  @param generalizedAlphabet The (generalized) alphabet, i.e. contains characters that
	 *                             are sets of characters of the ordinary alphabet.
	 *  @param considerReverse     If true, an occurrence of the reverse complement is considered
	 *                             a match. Must be set in accordance with suffixTree, i.e. 
	 *                             considerReverse iff suffixTree also contains reverse complement
	 *                             of sequences. 
	 */
	public MotifFinder(SuffixTree suffixTree, BitArray[] generalizedAlphabet, boolean considerReverse) {
		this.suffixTree = suffixTree;
		this.generalizedAlphabet = generalizedAlphabet;
		this.considerReverse = considerReverse;
	}
	
	/** Class meant as an API to be used by classes implementing the SearchSpecification interface. */
	public class SearchState {
		/** By calling this method, an instance of Condition can request
		 *  multiplicities to be calculated. */
		public void requestMultiplicity() { calculateMultiplicity = true; }
		public int getMultiplicityLeft() { return multiplicityLeft; }
		public BitArray[] getGeneralizedAlphabet() { return generalizedAlphabet; }
		/** Returns the abelian pattern currently searched. */
		public int[] getAbelianPattern() { return abelianPattern; }
		/** Returns instance of abelian pattern that is currently under
		 *  investigation.
		 *  NOTE: The node set given to SearchSpecification.check() always corresponds
		 *        to the prefix of length getDepth() of this pattern. 
		 */
		public int[] getPattern() { return string; }
		public IupacStringConstraints getIupacConstraints() { return constraints; }
		/** Length of strings in search space. */
		public int getStringLength() { return length; }
		/** Returns whether motif is considered jointly with its reverse complement. */
		public boolean isReverseConsidered() { return considerReverse; }
	}
	
	/** Abstracts a condition for patterns which are to be found. */
	public interface SearchSpecification {
		/** Initialization method called once before search starts.
		 *  @param searchState Through this object information about the current search
		 *                     can be obtained. 
		 */
		public void initialize(SearchState searchState);
		/** Called in the course of the iteration over pattern space whenever
		 *  the pattern has changed.
		 *  
		 *  @param newCharacter Gives the new character at leftmostChangedPosition, i.e.
		 *                      after the call, a prefix of length leftmostChangedPosition+1
		 *                      is known.
		 */
		public void updatePattern(int newCharacter, int leftmostChangedPosition);
		/** Function that decides whether a pattern that matches the prefixes 
		 *  associated with the given nodes fulfills the condition. It must be 
		 *  ensured, that the prefix of given length has been made known through calls to
		 *  updatePattern.
		 *  
		 *  IMPORTANT: Must yield consistent results, meaning that if true
		 *             is returned, then the set of all nodes' parents must also
		 *             return true.
		 */
		public boolean check(int prefixLength, int[] nodes);
		/** When a candidate has been found, it is passed to this method. Note that
		 *  check() has not necessarily been called on this candidate. If the candidate
		 *  is interesting, this method should store/report it as a result. It must be
		 *  ensured, that whole pattern has been made known through calls to updatePattern.
		 */
		public void evaluateCandidate(int[] nodes);
		/** Returns interesting patterns found in evaluateCandidate. */
		public Collection<EvaluatedPattern> getResults();
	}

	private void reset() {
		calculateMultiplicity = false;
		multiplicityLeft = -1;
		abelianPattern = null;
		constraints = null;
		string = null;
		iterator = null;
		checkCounts = null;
		skipCounts = null;
	}
	
	/** Searches the space of all patterns that are instances of a given abelian pattern.
	 *  This method iterates over all these patterns (in alphabetical order) and, in parallel,
	 *  walks a suffix tree. The details of the search, e.g. the search condition, is to
	 *  be given in the form of a SearchSpecification. The collection of result is also done
	 *  by the given SearchSpecification.
	 *
	 *  @param abelianPattern Array of letter frequencies w.r.t. the generalized 
	 *         alphabet given in constructor call.
	 */
	public void findAbelianPatternInstances(int[] abelianPattern, SearchSpecification specification) {
		reset();
		this.abelianPattern = abelianPattern;
		iterator = new AbelianPatternInstanceIterator(abelianPattern);
		run(specification);
	}

	/** Searches the space of all IUPAC patterns that comply with given constraints.
	 *  This method iterates over all these patterns (in alphabetical order) and, in parallel,
	 *  walks a suffix tree. The details of the search, e.g. the search condition, is to
	 *  be given in the form of a SearchSpecification. The collection of result is also done
	 *  by the given SearchSpecification.
	 */
	public void findIupacPatterns(int length, IupacStringConstraints constraints, SearchSpecification specification) {
		reset();
		this.constraints = constraints;
		iterator = new IupacPatternIterator(length, constraints);
		run(specification);
	}

	/** Searches the space of all IUPAC patterns that comply with given constraints and lie
	 *  within the specified range, i.e. all patterns for with lowerBound<=pattern<upperBound.
	 */
	public void findIupacPatterns(int length, IupacStringConstraints constraints, SearchSpecification specification, int[] lowerBound, int[] upperBound) {
		if ((lowerBound==null) && (upperBound==null)) {
			findIupacPatterns(length, constraints, specification);
			return;
		}
		reset();
		this.constraints = constraints;
		iterator = new RangeIterator(new IupacPatternIterator(length, constraints),lowerBound,upperBound);
		run(specification);
	}
	
	private void run(SearchSpecification specification) {
		if (!iterator.hasNext()) throw new IllegalStateException("Iteration range is empty!");
		SearchState state = new SearchState();
		string = iterator.peek();
		length = string.length;
		checkCounts = new int[length+1];
		skipCounts = new int[length];
		specification.initialize(state);
		// if requested, create table of character multiplicities
		int[] multiplicities = null;
		int totalMultiplicity = -1;
		int[] multiplicityLeftTable = null;
		if (this.calculateMultiplicity) {
			multiplicities = new int[generalizedAlphabet.length];
			multiplicityLeftTable = new int[length+1];
			for (int i=0; i<generalizedAlphabet.length; ++i) multiplicities[i]=generalizedAlphabet[i].numberOfOnes();
			totalMultiplicity = 1;
			for (int i=0; i<abelianPattern.length; ++i) {
				for (int j=0; j<abelianPattern[i]; ++j) totalMultiplicity*=multiplicities[i];
			}
			multiplicityLeftTable[0] = totalMultiplicity;
		}
		SuffixTreeWalker walker = suffixTree.getWalker();
		int[] nodeIndices = null;
		// iterate over all possible patterns that match the abelian pattern in 
		// lexicographic order (allows us to skip patterns now and then...)
		while (iterator.hasNext()) {
			// string currently under investigation
			string = iterator.next();
			// start iteration here, i.e. recycle as much intermediate results as possible
			int i = iterator.getLeftmostChangedPosition();
			if (i<=3) {
				Log.println(Log.Level.VERBOSE, "== " + Alphabet.getIupacAlphabet().buildString(string).substring(0,i+1));
			}
			nodeIndices = walker.backward(i);
			// helper variable used to indicate that string has to change at this
			// position (and therefore some strings may be skipped)
			int skipPosition = -1;
			for (;i<length;++i) {
				if (calculateMultiplicity) {
					multiplicityLeft = multiplicityLeftTable[i]/multiplicities[string[i]];
					multiplicityLeftTable[i+1] = multiplicityLeft; 
				}
				BitArray generalizedCharacter = generalizedAlphabet[string[i]];
				nodeIndices = walker.forward(generalizedCharacter);
				if (nodeIndices.length==0) {
					skipPosition=i;
					break;
				}
				specification.updatePattern(string[i], i);
				// check if we can skip some strings
				if (i<length-1) {
					checkCounts[i+1] += 1;
					if (!specification.check(i+1,nodeIndices)) {	
						skipPosition=i;
						skipCounts[skipPosition] += 1;
						break;
					}
				}
			}
			if (skipPosition>=0) {
				iterator.skip(skipPosition);
				continue;
			}
			// if there is at least one match ...
			if (i==length) {
				checkCounts[length]+=1;
				specification.evaluateCandidate(nodeIndices);
			}
		}
		Log.printf(Log.Level.DEBUG, "skipCounts:  %s\n", Arrays.toString(skipCounts));
		Log.printf(Log.Level.DEBUG, "checkCounts: %s\n", Arrays.toString(checkCounts));
	}


}
